{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import statsmodels.api as sm\n",
    "from  matplotlib import pyplot\n",
    "from sklearn.preprocessing import PolynomialFeatures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load Input data\n",
    "df = pd.read_csv('MLR_InputFile.csv')\n",
    "\n",
    "# Create the training data and test data \n",
    "train_proportion = 0.85\n",
    "train_len = int(len(df)*train_proportion)\n",
    "test_len = int(len(df)*(1-train_proportion))\n",
    "Train_data = df[0:train_len]\n",
    "Test_data = df[train_len:len(df)]\n",
    "\n",
    "Train_data = Train_data.set_index('date')\n",
    "col_name = Train_data.columns\n",
    "\n",
    "Test_data = Test_data.set_index('date')\n",
    "col_test_name = Test_data.columns\n",
    "\n",
    "# Normalization\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "scalar = MinMaxScaler()\n",
    "train_scaled = scalar.fit_transform(Train_data)\n",
    "test_scaled = scalar.fit_transform(Test_data)\n",
    "train_scaled = pd.DataFrame(train_scaled, columns = [col_name])\n",
    "test_scaled = pd.DataFrame(test_scaled, columns = [col_test_name])\n",
    "\n",
    "# Save the normalized train and test data\n",
    "train_scaled.to_csv('Normalized_TrainData_FHV.csv', index=False)\n",
    "test_scaled.to_csv('Normalized_TestData_FHV.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:            yellow taxi   R-squared:                       0.573\n",
      "Model:                            OLS   Adj. R-squared:                  0.569\n",
      "Method:                 Least Squares   F-statistic:                     137.3\n",
      "Date:                Tue, 31 Mar 2020   Prob (F-statistic):          1.87e-163\n",
      "Time:                        16:37:16   Log-Likelihood:                 824.47\n",
      "No. Observations:                 931   AIC:                            -1629.\n",
      "Df Residuals:                     921   BIC:                            -1581.\n",
      "Df Model:                           9                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "================================================================================\n",
      "                   coef    std err          t      P>|t|      [0.025      0.975]\n",
      "--------------------------------------------------------------------------------\n",
      "const            0.7640      0.012     64.337      0.000       0.741       0.787\n",
      "('Mon',)        -0.0362      0.013     -2.885      0.004      -0.061      -0.012\n",
      "('Tue',)         0.0518      0.012      4.204      0.000       0.028       0.076\n",
      "('Wed',)         0.0951      0.012      7.708      0.000       0.071       0.119\n",
      "('Thr',)         0.1515      0.012     12.256      0.000       0.127       0.176\n",
      "('Fri',)         0.1445      0.012     11.643      0.000       0.120       0.169\n",
      "('Sun',)        -0.1754      0.013    -13.970      0.000      -0.200      -0.151\n",
      "('SNOW',)       -0.8224      0.082    -10.047      0.000      -0.983      -0.662\n",
      "('Holiday',)    -0.3401      0.024    -14.217      0.000      -0.387      -0.293\n",
      "('FHV',)        -0.3289      0.018    -17.853      0.000      -0.365      -0.293\n",
      "==============================================================================\n",
      "Omnibus:                      310.043   Durbin-Watson:                   0.696\n",
      "Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1264.834\n",
      "Skew:                          -1.528   Prob(JB):                    2.21e-275\n",
      "Kurtosis:                       7.824   Cond. No.                         28.4\n",
      "==============================================================================\n",
      "\n",
      "Warnings:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
     ]
    }
   ],
   "source": [
    "# Assign Input and Output\n",
    "input_X = train_scaled[['Mon','Tue','Wed','Thr','Fri','Sun','SNOW','Holiday','FHV']]\n",
    "input_X = sm.add_constant(input_X)\n",
    "output_Y = train_scaled['yellow taxi']\n",
    "\n",
    "# Linear Regression Model\n",
    "model = sm.OLS(output_Y, input_X)\n",
    "linear_model = model.fit()\n",
    "print (linear_model.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[13.038111839232513,\n",
       " 1.785847247054203,\n",
       " 1.7202561097074047,\n",
       " 1.724936368686709,\n",
       " 1.7290354976965425,\n",
       " 1.7439406324618347,\n",
       " 1.783987827460511,\n",
       " 1.0208168805497386,\n",
       " 1.0578183009515234,\n",
       " 1.1554569411913138]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Check the multicollinearity\n",
    "from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
    "# variance_inflation_factor(the exogeneous ndarray, the index column of the exogenous variables)\n",
    "[variance_inflation_factor(input_X.values, i) for i in range(input_X.shape[1])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
